import requests

# 需求:爬取三国演义中所有的章节标题和章节内容https://www.shicimingju.com/book/sanguoyanyi.html
from bs4 import BeautifulSoup

if __name__ == '__main__':
    # 对首页的页面数据进行爬取
    url = 'https://www.shicimingju.com/book/sanguoyanyi.html'

    # 2. UA伪装: 将对应的User-Agent封装到一个字典中
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36'
    }

    page_text = requests.get(url=url,headers=headers)
    page_text.encoding = 'utf-8'
    soup = BeautifulSoup(page_text.text,'lxml')

    li_list = soup.select('.book-mulu li')

    fp=open('./sanguo.txt','w',encoding='utf-8')
    for li in li_list:
        title = li.a.string
        detail_url='https://www.shicimingju.com'+li.a['href']
        # 对详情页发起请求,解析出章节内容
        detail_page= requests.get(url=detail_url,headers=headers)
        detail_page.encoding = 'utf-8'
        detail_page_text = detail_page.text
        detail_page_soup = BeautifulSoup(detail_page_text,'lxml')
        div_tag = detail_page_soup.find('div',class_='chapter_content')
        # 解析到章节内容
        content = div_tag.text
        fp.write(title+":"+content+'\n')
        print(title+"     爬取成功!!!")
